***This do file is to generate list 3 based on the following matching rule
***List 3 matching covariates (everything in List 2 + age care

******************First need Imputation (similar imputation as List 1 but add blockgroup missing imputation**************************

*****Important note: multi family can be done in 5 hours, but single family cannot (need to run on Hoffman2)

*cd /u/scratch/y/yatingch
*cd /u/home/y/yatingch
****Update 3/7/2018 because problem of 2012 data (after matching part)

cd V:\PIER_Data\Yating\Data
set more off

ssc install psmatch2, replace


**************************************First do IMPUTATION for easier match (otherwise missing values will drop certain match under subset **********

/*
. tab usetype

          usetype |      Freq.     Percent        Cum.
------------------+-----------------------------------
            condo |  1,311,658       11.88       11.88
        mixed_use |     59,101        0.54       12.41
     multi_family |  3,599,603       32.60       45.01
residential_other |    226,725        2.05       47.07
    single_family |  5,844,928       52.93      100.00
------------------+-----------------------------------
            Total | 11,042,015      100.00
*/
***(Check out preanalysis_matching3_List1_1015 for code)

*************************************************************END of IMPUTATION****************************************************************************

***********************************************************END of IMPUTATION****************************************************************************
*/

*****************************************Matching algorithm for List 3************************************************
****************************************single_family ************************************
cd V:\PIER_Data\Yating\Data
cap log close
log using .\test_single_family_matching_List3.log, replace
set more off
cap tempfile temp1 temp2
clear
gen test=.	
save `temp2',replace

use "./account_usetype_parcel_block_single_family_100sqftImpute",clear
tempfile impute

***new (deal with those missing)
foreach x in sqft_quentile climate_zones_id vintage_id{
 count if `x'==.
 if r(N)>0 {			
	replace `x'=0 if `x'==.
	}
}
save `impute', replace

dis "$S_DATE, $S_TIME code start"
forvalues vin=0/4{
	foreach climate in 0 6 11 12 13 14 15 16{
		forvalues sqft=0/100{
			dis "$S_DATE, $S_TIME loop1"
			use `impute',clear
			*gen ee_1012=1 if ee_account=="t"
			*replace ee_1012=0 if ee_account=="f"
			count if sqft_quentile==`sqft'& climate_zones_id==`climate'& vintage_id==`vin'	
			dis "obervation in sqft_quentile==`sqft'& climate_zones_id==`climate'& vintage_id==`vin'"	
			if r(N)>0 {			
			    keep if sqft_quentile==`sqft'& climate_zones_id==`climate'& vintage_id==`vin'
				local covariates1 "median_income density poverty_pop white black asian hispanic prct_pop25_morebachelor prct_owner occupancy_rate reside x y age care pool_yes"
				dis "$S_DATE, $S_TIME before matching"
				count if ee==1
				if r(N)>30 {			
					xi: psmatch2 ee, mahal(`covariates1') 
					dis "$S_DATE, $S_TIME after matching"
					dis "after matching---------------------------------------"
					tab ee if _weight!=.
					tab _support
					save `temp1', replace
				}
				else{
					dis "subset `i' has no observation"
					tab ee
					cap gen insufficient_ee=.
					replace insufficient_ee=1
					save `temp1', replace
				}	
				use `temp2',clear
				append using `temp1'
				save `temp2',replace
			}
			else{
				dis "subset `i' has no observation"
			}
		}	
	}
	
}
dis "$S_DATE, $S_TIME code END"
drop test
save "./matchList3/matchList3_single_family_1015", replace
dis "$S_DATE, $S_TIME code END"
cap log close

****************************************multi_family (Need to run on Hoffman************************************
cd V:\PIER_Data\Yating\Data
cap log close
log using .\test_multi_family_matching_List3.log, replace
set more off
cap tempfile temp1 temp2
clear
gen test=.	
save `temp2',replace

use "./account_usetype_parcel_block_multi_family_100sqftImpute",clear
tempfile impute

***new (deal with those missing)
foreach x in sqft_quentile climate_zones_id vintage_id{
 count if `x'==.
 if r(N)>0 {			
	replace `x'=0 if `x'==.
	}
}
save `impute', replace

dis "$S_DATE, $S_TIME code start"
forvalues vin=0/4{
	foreach climate in 0 6 11 12 13 14 15 16{
		forvalues sqft=0/100{
			dis "$S_DATE, $S_TIME loop1"
			use `impute',clear
			*gen ee_1012=1 if ee_account=="t"
			*replace ee_1012=0 if ee_account=="f"
			count if sqft_quentile==`sqft'& climate_zones_id==`climate'& vintage_id==`vin'	
			dis "obervation in sqft_quentile==`sqft'& climate_zones_id==`climate'& vintage_id==`vin'"	
			if r(N)>0 {			
			    keep if sqft_quentile==`sqft'& climate_zones_id==`climate'& vintage_id==`vin'
				local covariates1 "median_income density poverty_pop white black asian hispanic prct_pop25_morebachelor prct_owner occupancy_rate reside x y age care pool_yes"
				dis "$S_DATE, $S_TIME before matching"
				count if ee==1
				if r(N)>30 {			
					xi: psmatch2 ee, mahal(`covariates1') 
					dis "$S_DATE, $S_TIME after matching"
					dis "after matching---------------------------------------"
					tab ee if _weight!=.
					tab _support
					save `temp1', replace
				}
				else{
					dis "subset `i' has no observation"
					tab ee
					cap gen insufficient_ee=.
					replace insufficient_ee=1
					save `temp1', replace
				}	
				use `temp2',clear
				append using `temp1'
				save `temp2',replace
			}
			else{
				dis "subset `i' has no observation"
			}
		}	
	}
	
}
dis "$S_DATE, $S_TIME code END"
drop test
save "./matchList3/matchList3_multi_family_1015",replace
dis "$S_DATE, $S_TIME code END"
cap log close

****************************************condo***********************************
cd V:\PIER_Data\Yating\Data
cap log close
log using .\test_condo_matching_List3.log, replace
set more off
cap tempfile temp1 temp2
clear
gen test=.	
save `temp2',replace

use "./account_usetype_parcel_block_condo_100sqftImpute",clear
tempfile impute

***new (deal with those missing)
foreach x in sqft_quentile climate_zones_id vintage_id{
 count if `x'==.
 if r(N)>0 {			
	replace `x'=0 if `x'==.
	}
}
save `impute', replace

dis "$S_DATE, $S_TIME code start"
forvalues vin=0/4{
	foreach climate in 0 6 11 12 13 14 15 16{
			dis "$S_DATE, $S_TIME loop1"
			use `impute',clear
			*gen ee_1012=1 if ee_account=="t"
			*replace ee_1012=0 if ee_account=="f"
			count if  climate_zones_id==`climate'& vintage_id==`vin'	
			dis "obervation in  climate_zones_id==`climate'& vintage_id==`vin'"	
			if r(N)>0 {			
			    keep if  climate_zones_id==`climate'& vintage_id==`vin'
				local covariates1 "median_income density poverty_pop white black asian hispanic prct_pop25_morebachelor prct_owner occupancy_rate reside x y age care pool_yes"
				dis "$S_DATE, $S_TIME before matching"
				count if ee==1
				if r(N)>30 {			
					xi: psmatch2 ee, mahal(`covariates1') 
					dis "$S_DATE, $S_TIME after matching"
					dis "after matching---------------------------------------"
					tab ee if _weight!=.
					tab _support
					save `temp1', replace
				}
				else{
					dis "subset `i' has no observation"
					tab ee
					cap gen insufficient_ee=.
					replace insufficient_ee=1
					save `temp1', replace
				}	
				use `temp2',clear
				append using `temp1'
				save `temp2',replace
			}
			else{
				dis "subset `i' has no observation"
			}	
	}
}
dis "$S_DATE, $S_TIME code END"
drop test
save "./matchList3/matchList3_condo_1015",replace
dis "$S_DATE, $S_TIME code END"
cap log close


****************************************mixed_use************************************
cd V:\PIER_Data\Yating\Data
cap log close
log using .\test_mixed_use_matching_List3.log, replace
set more off
cap tempfile temp1 temp2
clear
gen test=.	
save `temp2',replace

use "./account_usetype_parcel_block_mixed_use_100sqftImpute",clear
tempfile impute

***new (deal with those missing)
foreach x in sqft_quentile climate_zones_id vintage_id{
 count if `x'==.
 if r(N)>0 {			
	replace `x'=0 if `x'==.
	}
}
save `impute', replace



dis "$S_DATE, $S_TIME code start"
forvalues vin=1/4{
	foreach climate in 6 11 12 13 14 15 16{
			dis "$S_DATE, $S_TIME loop1"
			use `impute',clear
			*gen ee_1012=1 if ee_account=="t"
			*replace ee_1012=0 if ee_account=="f"
			count if  climate_zones_id==`climate'& vintage_id==`vin'	
			dis "obervation in  climate_zones_id==`climate'& vintage_id==`vin'"	
			if r(N)>0 {			
			    keep if  climate_zones_id==`climate'& vintage_id==`vin'
				local covariates1 "median_income density poverty_pop white black asian hispanic prct_pop25_morebachelor prct_owner occupancy_rate reside x y age care pool_yes"
				dis "$S_DATE, $S_TIME before matching"
				count if ee==1
				if r(N)>30 {			
					xi: psmatch2 ee, mahal(`covariates1') 
					dis "$S_DATE, $S_TIME after matching"
					dis "after matching---------------------------------------"
					tab ee if _weight!=.
					tab _support
					save `temp1', replace
				}
				else{
					dis "subset `i' has no observation"
					tab ee
					cap gen insufficient_ee=.
					replace insufficient_ee=1
					save `temp1', replace
				}	
				use `temp2',clear
				append using `temp1'
				save `temp2',replace
			}
			else{
				dis "subset `i' has no observation"
			}	
	}
}
dis "$S_DATE, $S_TIME code END"
drop test
save "./matchList3/matchList3_mixed_use_1015",replace
dis "$S_DATE, $S_TIME code END"
cap log close

****************************************residential_other************************************
cd V:\PIER_Data\Yating\Data
cap log close
log using .\test_residential_other_matching_List3.log, replace
set more off
cap tempfile temp1 temp2
clear
gen test=.	
save `temp2',replace

use "./account_usetype_parcel_block_residential_other_100sqftImpute",clear
tempfile impute

***new (deal with those missing)
foreach x in sqft_quentile climate_zones_id vintage_id{
 count if `x'==.
 if r(N)>0 {			
	replace `x'=0 if `x'==.
	}
}
save `impute', replace

dis "$S_DATE, $S_TIME code start"
forvalues vin=1/4{
	foreach climate in 6 11 12 13 14 15 16{
			dis "$S_DATE, $S_TIME loop1"
			use `impute',clear
			*gen ee_1012=1 if ee_account=="t"
			*replace ee_1012=0 if ee_account=="f"
			count if  climate_zones_id==`climate'& vintage_id==`vin'	
			dis "obervation in  climate_zones_id==`climate'& vintage_id==`vin'"	
			if r(N)>0 {			
			    keep if  climate_zones_id==`climate'& vintage_id==`vin'
				local covariates1 "median_income density poverty_pop white black asian hispanic prct_pop25_morebachelor prct_owner occupancy_rate reside x y age care pool_yes"
				dis "$S_DATE, $S_TIME before matching"
				count if ee==1
				if r(N)>30 {			
					xi: psmatch2 ee, mahal(`covariates1') 
					dis "$S_DATE, $S_TIME after matching"
					dis "after matching---------------------------------------"
					tab ee if _weight!=.
					tab _support
					save `temp1', replace
				}
				else{
					dis "subset `i' has no observation"
					tab ee
					cap gen insufficient_ee=.
					replace insufficient_ee=1
					save `temp1', replace
				}	
				use `temp2',clear
				append using `temp1'
				save `temp2',replace
			}
			else{
				dis "subset `i' has no observation"
			}	
	}
}
dis "$S_DATE, $S_TIME code END"
drop test
save "./matchList3/matchList3_residential_other_1015",replace
dis "$S_DATE, $S_TIME code END"
cap log close

****NOt yet done!!!!!*
*******************************combine together***************************
***Keep all those on common support
*redo it remember to trim if _weight!=. | insufficient_ee==1
*fix city


cd V:\PIER_Data\Yating\Data
foreach x in condo mixed_use multi_family residential_other single_family{
	use "./matchList3/matchList3_`x'_1015", clear
	***keep those on support
	keep if _weight!=. |  insufficient_ee==1
	tab ee
	tempfile `x'
	save ``x''
}
use `condo',clear
foreach x in mixed_use multi_family residential_other single_family{
	append using ``x''
}

tempfile List3
sort accounts_id
save `List3'


****merge with rand rand List3****
foreach t in 2010 2011 2012 2013 2014{
	use .\account_usetype_parcel_block_cons_`t'.dta, clear
	sort accounts_id
	cap drop _merge
	merge accounts_id using `List3'
	tab _merge
	keep if _merge==3
	tempfile `t'
	save ``t''
}
use `2010',clear
append using `2011'
append using `2012'
save .\account_usetype_parcel_block_cons_2010_2012_matchList3_ee1015_Final,replace

append using `2013'
append using `2014'
save .\account_usetype_parcel_block_cons_2010_2014_matchList3_ee1015_Final,replace
tab year
***update end here (3/7/2018)****
